@inproceedings{bernstein_discovering_2009,
  author    = {Julius Volz and
               Christian Bizer and
               Martin Gaedke and
               Georgi Kobilarov},
  title     = {Discovering and Maintaining Links on the Web of Data},
  booktitle = {International Semantic Web Conference},
  year      = {2009},
  pages     = {650-665},
  ee        = {http://dx.doi.org/10.1007/978-3-642-04930-9_41},
  crossref  = {DBLP:conf/semweb/2009},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{Niu:2011:ZWC:2063076.2063091,
 author = {Niu, Xing and Sun, Xinruo and Wang, Haofen and Rong, Shu and Qi, Guilin and Yu, Yong},
 title = {Zhishi.me: weaving chinese linking open data},
 booktitle = {Proceedings of the 10th international conference on The semantic web - Volume Part II},
 series = {ISWC'11},
 year = {2011},
 isbn = {978-3-642-25092-7},
 location = {Bonn, Germany},
 pages = {205--220},
 numpages = {16},
 url = {http://dl.acm.org/citation.cfm?id=2063076.2063091},
 acmid = {2063091},
 publisher = {Springer-Verlag},
 address = {Berlin, Heidelberg},
} ,
@inproceedings{Song:2011:AGD:2063016.2063058,
  author    = {Dezhao Song and
               Jeff Heflin},
  title     = {Automatically Generating Data Linkages Using a Domain-Independent
               Candidate Selection Approach},
  booktitle = {International Semantic Web Conference (1)},
  year      = {2011},
  pages     = {649-664},
  ee        = {http://dx.doi.org/10.1007/978-3-642-25073-6_41},
  crossref  = {DBLP:conf/semweb/2011-1},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{DBLP:conf/sigmod/TalukdarIP10,
 author    = {Partha Pratim Talukdar and
              Zachary G. Ives and
              Fernando Pereira},
 title     = {Automatically incorporating new sources in keyword search-based
              data integration},
 booktitle = {SIGMOD Conference},
 year      = {2010},
 pages     = {387-398},
 ee        = {http://doi.acm.org/10.1145/1807167.1807211},
 crossref  = {DBLP:conf/sigmod/2010},
 bibsource = {DBLP, http://dblp.uni-trier.de}
},
@inproceedings{branting_comparative_2003,
  author    = {Karl Branting},
  title     = {A Comparative Evaluation of Name-Matching Algorithms},
  booktitle = {ICAIL},
  year      = {2003},
  pages     = {224-232},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@book{cohen_comparison_2003,
	title = {A comparison of string metrics for matching names and records},
	url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.5.9007},
	abstract = {We describe an open-source Java toolkit of methods for matching names and records. We summarize results obtained from using various string distance metrics on the task of matching entity names. These metrics include distance functions proposed by several different communities, such as edit-distance metrics, fast heuristic string comparators, token-based distance metrics, and hybrid methods. We then describe an extension to the toolkit which allows records to be compared.},
	author = {Cohen, W. and Ravikumar, P. and Fienberg, S.},
	month = aug,
	year = {2003},
	keywords = {java, Matching, names, opensource, street}
},

@article{hadjieleftheriou_approximate_2009,
  author    = {Marios Hadjieleftheriou and
               Divesh Srivastava},
  title     = {Approximate String Processing},
  journal   = {Foundations and Trends in Databases},
  volume    = {2},
  number    = {4},
  year      = {2011},
  pages     = {267-402},
  ee        = {http://dx.doi.org/10.1561/1900000010},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@TechReport{yongtao2011,
	author = {Ma, Yongtao and Tran, Thanh},
	title  ={Unsupervised Learning of Blocking Keys for Web Data Integration},
	year = {2011},
	url = {https://sites.google.com/site/kimducthanh/publication/unsupervisedBlocking.pdf},
	institution = {AIFB, Karlsruhe Instistute of Technology}
},


@article{fellegi_theory_1969,
	title = {A Theory for Record Linkage},
	volume = {64},
	copyright = {Copyright © 1969 American Statistical Association},
	issn = {01621459},
	url = {http://www.jstor.org/stable/2286061},
	abstract = {A mathematical model is developed to provide a theoretical framework for a computer-oriented solution to the problem of recognizing those records in two files which represent identical persons, objects or events (said to be matched). A comparison is to be made between the recorded characteristics and values in two records (one from each file) and a decision made as to whether or not the members of the comparison-pair represent the same person or event, or whether there is insufficient evidence to justify either of these decisions at stipulated levels of error. These three decisions are referred to as link {(A{\textless}sub{\textgreater}1{\textless}/sub{\textgreater})}, a non-link {(A{\textless}sub{\textgreater}3{\textless}/sub{\textgreater})}, and a possible link {(A{\textless}sub{\textgreater}2{\textless}/sub{\textgreater}).} The first two decisions are called positive dispositions. The two types of error are defined as the error of the decision A{\textless}sub{\textgreater}1{\textless}/sub{\textgreater} when the members of the comparison pair are in fact unmatched, and the error of the decision A{\textless}sub{\textgreater}3{\textless}/sub{\textgreater} when the members of the comparison pair are, in fact matched. The probabilities of these errors are defined as μ = ∑{\textless}sub{\textgreater}γεΓ{\textless}/sub{\textgreater} {u(γ)P(A{\textless}sub{\textgreater}1{\textless}/sub{\textgreater}∣γ)} and λ = ∑{\textless}sub{\textgreater}γεΓ{\textless}/sub{\textgreater} {m(γ)P(A{\textless}sub{\textgreater}3{\textless}/sub{\textgreater}∣γ)} respectively where u(γ), m(γ) are the probabilities of realizing γ (a comparison vector whose components are the coded agreements and disagreements on each characteristic) for unmatched and matched record pairs respectively. The summation is over the whole comparison space Γ of possible realizations. A linkage rule assigns probabilities {P(A{\textless}sub{\textgreater}1{\textless}/sub{\textgreater}∣γ)}, and {P(A{\textless}sub{\textgreater}2{\textless}/sub{\textgreater}∣γ)}, and {P(A{\textless}sub{\textgreater}3{\textless}/sub{\textgreater}∣γ)} to each possible realization of γ ε Γ. An optimal linkage rule L(μ, λ, Γ) is defined for each value of (μ, λ) as the rule that minimizes {P(A{\textless}sub{\textgreater}2{\textless}/sub{\textgreater})} at those error levels. In other words, for fixed levels of error, the rule minimizes the probability of failing to make positive dispositions. A theorem describing the construction and properties of the optimal linkage rule and two corollaries to the theorem which make it a practical working tool are given.},
	number = {328},
	journal = {Journal of the American Statistical Association},
	author = {Fellegi, Ivan P. and Sunter, Alan B.},
	year = {1969},
	pages = {pp. 1183--1210}
},

@incollection{gomez-perez_overcoming_2009,
  author    = {Andriy Nikolov and
               Victoria S. Uren and
               Enrico Motta and
               Anne N. De Roeck},
  title     = {Overcoming Schema Heterogeneity between Linked Semantic
               Repositories to Improve Coreference Resolution},
  booktitle = {ASWC},
  year      = {2009},
  pages     = {332-346},
  ee        = {http://dx.doi.org/10.1007/978-3-642-10871-6_23},
  crossref  = {DBLP:conf/aswc/2009},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{melnik_similarity_2002, 
  author    = {Sergey Melnik and
               Hector Garcia-Molina and
               Erhard Rahm},
  title     = {Similarity Flooding: A Versatile Graph Matching Algorithm
               and Its Application to Schema Matching},
  booktitle = {ICDE},
  year      = {2002},
  pages     = {117-128},
  ee        = {http://doi.ieeecomputersociety.org/10.1109/ICDE.2002.994702},
  crossref  = {DBLP:conf/icde/2002},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{hanchuan_peng_feature_2005,
	title = {Feature selection based on mutual information criteria of max-dependency, max-relevance, and min-redundancy},
	volume = {27},
	issn = {0162-8828},
	url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1453511},
	doi = {10.1109/TPAMI.2005.159},
	journal = {{IEEE} Transactions on Pattern Analysis and Machine Intelligence},
	author = {Hanchuan Peng and Fuhui Long and Ding, C.},
	month = aug,
	year = {2005},
	pages = {1226--1238}
},

@inproceedings{mccallum_efficient_2000,
  author    = {Andrew McCallum and
               Kamal Nigam and
               Lyle H. Ungar},
  title     = {Efficient clustering of high-dimensional data sets with
               application to reference matching},
  booktitle = {KDD},
  year      = {2000},
  pages     = {169-178},
  ee        = {http://doi.acm.org/10.1145/347090.347123},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},


@inproceedings{wang_structure-based_2010,
  author    = {Ying Wang and
               Weiru Liu and
               David A. Bell},
  title     = {A Structure-Based Similarity Spreading Approach for Ontology
               Matching},
  booktitle = {SUM},
  year      = {2010},
  pages     = {361-374},
  ee        = {http://dx.doi.org/10.1007/978-3-642-15951-0_33},
  crossref  = {DBLP:conf/sum/2010},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{bizer_linked_2009, 
  author    = {Christian Bizer and
               Tom Heath and
               Tim Berners-Lee},
  title     = {Linked Data - The Story So Far},
  journal   = {Int. J. Semantic Web Inf. Syst.},
  volume    = {5},
  number    = {3},
  year      = {2009},
  pages     = {1-22},
  ee        = {http://www.igi-global.com/articles/details.asp?ID=35386},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@misc{_bootstrapping_????, 
  author    = {Anish Das Sarma and
               Xin Dong and
               Alon Y. Halevy},
  title     = {Bootstrapping pay-as-you-go data integration systems},
  booktitle = {SIGMOD Conference},
  year      = {2008},
  pages     = {861-874},
  ee        = {http://doi.acm.org/10.1145/1376616.1376702},
  crossref  = {DBLP:conf/sigmod/2008},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@misc{_ieee_????, 
  author    = {Juanzi Li and
               Jie Tang and
               Yi Li and
               Qiong Luo},
  title     = {RiMOM: A Dynamic Multistrategy Ontology Alignment Framework},
  journal   = {IEEE Trans. Knowl. Data Eng.},
  volume    = {21},
  number    = {8},
  year      = {2009},
  pages     = {1218-1232},
  ee        = {http://dx.doi.org/10.1109/TKDE.2008.202},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{dorneles_approximate_2010,
  author    = {Carina F. Dorneles and
               Rodrigo Gon\c{c}alves and
               Ronaldo dos Santos Mello},
  title     = {Approximate data instance matching: a survey},
  journal   = {Knowl. Inf. Syst.},
  volume    = {27},
  number    = {1},
  year      = {2011},
  pages     = {1-21},
  ee        = {http://dx.doi.org/10.1007/s10115-010-0285-0},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{jensen_why_2004,
  author    = {David Jensen and
               Jennifer Neville and
               Brian Gallagher},
  title     = {Why collective inference improves relational classification},
  booktitle = {KDD},
  year      = {2004},
  pages     = {593-598},
  ee        = {http://doi.acm.org/10.1145/1014052.1014125},
  crossref  = {DBLP:conf/kdd/2004},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{arampatzis_modeling_2010,
  author    = {Avi Arampatzis and
               Stephen Robertson},
  title     = {Modeling score distributions in information retrieval},
  journal   = {Inf. Retr.},
  volume    = {14},
  number    = {1},
  year      = {2011},
  pages     = {26-46},
  ee        = {http://dx.doi.org/10.1007/s10791-010-9145-5},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@misc{_lesson_????,
  author    = {Sylvia C. Wong and
               Richard M. Crowder and
               Gary B. Wills and
               Nigel R. Shadbolt},
  title     = {Lesson learnt from a large-scale industrial semantic web
               application},
  booktitle = {Hypertext},
  year      = {2007},
  pages     = {21-30},
  ee        = {http://doi.acm.org/10.1145/1286240.1286246},
  crossref  = {DBLP:conf/ht/2007},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{papadakis_efficient_2011,
  author    = {George Papadakis and
               Wolfgang Nejdl},
  title     = {Efficient entity resolution methods for heterogeneous information
               spaces},
  booktitle = {ICDE Workshops},
  year      = {2011},
  pages     = {304-307},
  ee        = {http://dx.doi.org/10.1109/ICDEW.2011.5767671},
  crossref  = {DBLP:conf/icde/2011w},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{budanitsky_evaluating_2006,
  author    = {Alexander Budanitsky and
               Graeme Hirst},
  title     = {Evaluating WordNet-based Measures of Lexical Semantic Relatedness},
  journal   = {Computational Linguistics},
  volume    = {32},
  number    = {1},
  year      = {2006},
  pages     = {13-47},
  ee        = {http://dx.doi.org/10.1162/coli.2006.32.1.13},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{tversky_features_1977,
	title = {Features of similarity.},
	volume = {84},
	issn = {{0033-295X}},
	url = {http://content.apa.org/journals/rev/84/4/327},
	doi = {10.1037/0033-295X.84.4.327},
	journal = {Psychological Review},
	author = {Tversky, Amos},
	year = {1977},
	pages = {327--352}
},

@article{juanzi_li_rimom:_2009,
  author    = {Juanzi Li and
               Jie Tang and
               Yi Li and
               Qiong Luo},
  title     = {RiMOM: A Dynamic Multistrategy Ontology Alignment Framework},
  journal   = {IEEE Trans. Knowl. Data Eng.},
  volume    = {21},
  number    = {8},
  year      = {2009},
  pages     = {1218-1232},
  ee        = {http://dx.doi.org/10.1109/TKDE.2008.202},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{hernandez_merge/purge_1995,
  author    = {Mauricio A. Hern{\'a}ndez and
               Salvatore J. Stolfo},
  title     = {The Merge/Purge Problem for Large Databases},
  booktitle = {SIGMOD Conference},
  year      = {1995},
  pages     = {127-138},
  ee        = {http://doi.acm.org/10.1145/223784.223807, db/conf/sigmod/sigmod95-9.html},
  crossref  = {DBLP:conf/sigmod/95},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@misc{_comparative_????,
  author    = {Karl Branting},
  title     = {A Comparative Evaluation of Name-Matching Algorithms},
  booktitle = {ICAIL},
  year      = {2003},
  pages     = {224-232},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{elmagarmid_duplicate_2007,
  author    = {Ahmed K. Elmagarmid and
               Panagiotis G. Ipeirotis and
               Vassilios S. Verykios},
  title     = {Duplicate Record Detection: A Survey},
  journal   = {IEEE Trans. Knowl. Data Eng.},
  volume    = {19},
  number    = {1},
  year      = {2007},
  pages     = {1-16},
  ee        = {http://dx.doi.org/10.1109/TKDE.2007.250581},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@incollection{spaccapietra_survey_2005,
  author    = {Pavel Shvaiko and
               J{\'e}r{\^o}me Euzenat},
  title     = {A Survey of Schema-Based Matching Approaches},
  booktitle = {J. Data Semantics IV},
  year      = {2005},
  pages     = {146-171},
  ee        = {http://dx.doi.org/10.1007/11603412_5},
  crossref  = {DBLP:journals/jods/2005-4},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{branting_comparative_2003-1,
  author    = {Karl Branting},
  title     = {A Comparative Evaluation of Name-Matching Algorithms},
  booktitle = {ICAIL},
  year      = {2003},
  pages     = {224-232},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{xiao_top-k_2009,
  author    = {Chuan Xiao and
               Wei Wang and
               Xuemin Lin and
               Haichuan Shang},
  title     = {Top-k Set Similarity Joins},
  booktitle = {ICDE},
  year      = {2009},
  pages     = {916-927},
  ee        = {http://dx.doi.org/10.1109/ICDE.2009.111},
  crossref  = {DBLP:conf/icde/2009},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{dorneles_approximate_2011,
  author    = {Carina F. Dorneles and
               Rodrigo Gon\c{c}alves and
               Ronaldo dos Santos Mello},
  title     = {Approximate data instance matching: a survey},
  journal   = {Knowl. Inf. Syst.},
  volume    = {27},
  number    = {1},
  year      = {2011},
  pages     = {1-21},
  ee        = {http://dx.doi.org/10.1007/s10115-010-0285-0},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@incollection{araujo_carbon:_2010,
	series = {Lecture Notes in Computer Science},
	title = {Carbon: {Domain-Independent} Automatic Web Form Filling},
	volume = {6189},
	url = {http://dx.doi.org/10.1007/978-3-642-13911-6_20},
	booktitle = {Web Engineering},
	publisher = {Springer Berlin / Heidelberg},
	author = {Araujo, Samur and Gao, Qi and Leonardi, Erwin and Houben, {Geert-Jan}},
	editor = {Benatallah, Boualem and Casati, Fabio and Kappel, Gerti and Rossi, Gustavo},
	year = {2010},
	note = {10.1007/978-3-642-13911-6\_20},
	pages = {292--306}
},

@inproceedings{han_structural_2010,
  author    = {Xianpei Han and
               Jun Zhao},
  title     = {Structural Semantic Relatedness: A Knowledge-Based Method
               to Named Entity Disambiguation},
  booktitle = {ACL},
  year      = {2010},
  pages     = {50-59},
  ee        = {http://www.aclweb.org/anthology/P10-1006},
  crossref  = {DBLP:conf/acl/2010},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{hu_bootstrapping_2011,
  author    = {Wei Hu and
               Yuzhong Qu and
               Xingzhi Sun},
  title     = {Bootstrapping Object Coreferencing on the Semantic Web},
  journal   = {J. Comput. Sci. Technol.},
  volume    = {26},
  number    = {4},
  year      = {2011},
  pages     = {663-675},
  ee        = {http://dx.doi.org/10.1007/s11390-011-1166-z},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{das_sarma_bootstrapping_2008,
  author    = {Anish Das Sarma and
               Xin Dong and
               Alon Y. Halevy},
  title     = {Bootstrapping pay-as-you-go data integration systems},
  booktitle = {SIGMOD Conference},
  year      = {2008},
  pages     = {861-874},
  ee        = {http://doi.acm.org/10.1145/1376616.1376702},
  crossref  = {DBLP:conf/sigmod/2008},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@article{araujo_serimi_2011,
	title = {{SERIMI} - Resource Description Similarity, {RDF} Instance Matching and Interlinking},
	volume = {abs/1107.1104},
	journal = {{CoRR}},
	author = {Araújo, Samur and Hidders, Jan and Schwabe, Daniel and de Vries, Arjen P.},
	year = {2011}
},

@inproceedings{xiao_top-k_2009-1,
  author    = {Chuan Xiao and
               Wei Wang and
               Xuemin Lin and
               Haichuan Shang},
  title     = {Top-k Set Similarity Joins},
  booktitle = {ICDE},
  year      = {2009},
  pages     = {916-927},
  ee        = {http://dx.doi.org/10.1109/ICDE.2009.111},
  crossref  = {DBLP:conf/icde/2009},
  bibsource = {DBLP, http://dblp.uni-trier.de}
},

@inproceedings{DBLP:conf/webdb/IseleJB11,
  author    = {Robert Isele and
               Anja Jentzsch and
               Christian Bizer},
  title     = {Efficient Multidimensional Blocking for Link Discovery without
               losing Recall},
  booktitle = {WebDB},
  year      = {2011},
  ee        = {http://webdb2011.rutgers.edu/papers/Paper\%2039/silk.pdf},
  crossref  = {DBLP:conf/webdb/2011},
  bibsource = {DBLP, http://dblp.uni-trier.de}
}